In this post, we use open data and R to look at the distribution of shootings in space and time in Baltimore.
The complete code is posted in the document but the snippets are hidden unless you click them.
Baltimore releases detailed data on issues relevant to the city, including crime. Here’s the raw numbers of shootings, plotted for each day.
library(tidyverse)
library(scales)
library(knitr)
bpd <- read_csv("/Users/peterphalen/Documents/ceasefire/BPD_Part_1_Victim_Based_Crime_Data.csv")
# subset to shootings or homicides with a firearm
bpd <- subset(bpd, Description == "SHOOTING" |
(Description == "HOMICIDE" & Weapon == "FIREARM"))
bpd$CrimeDate <- as.Date(bpd$CrimeDate, format = "%m/%d/%Y")
# there are many crimes per day. collapse to daily counts
daily <- bpd %>% group_by(CrimeDate) %>% summarise(shootings = n())
# fill missing dates, because some had no shootings
full.ts <- data.frame(CrimeDate = seq(daily$CrimeDate[1],
daily$CrimeDate[nrow(daily)], by="day"))
daily <- full_join(full.ts,daily)
daily <- daily %>% group_by(CrimeDate) %>% mutate_all(funs(ifelse(is.na(.),0,.)))
ggplot(daily) +
aes(x=CrimeDate, y=shootings) +
geom_point(alpha=.2) +
xlab("date") +
ylab("daily shooting deaths (vertically jittered)") +
scale_y_continuous(breaks=0:20) +
scale_x_date(labels = date_format("%b %Y")) +
ggtitle(" ",
subtitle="Baltimore (2012-present)")
You can tap neighborhoods to see exact numbers.
This map shows the raw count of murders in Baltimore since 2012 by neighborhood.
library(geojsonio)
library(leaflet)
bpd <- subset(bpd, !is.na(Neighborhood))
# count by neighobrhood
count <- bpd %>%
group_by(Neighborhood) %>%
summarise(total.count=n())
# get polygons to draw neighborhood maps
nbds <- geojsonio::geojson_read("/Users/peterphalen/Documents/ceasefire/Neighborhoods.geojson", what = "sp")
get_shooting_count <- function(neighborhood){
nbd <- as.character(neighborhood)
if(nbd %in% count$Neighborhood){
count <- count[count$Neighborhood == nbd,]$total.count
return(count)
}
if(!(nbd %in% count$Neighborhood)){
return(0)
}
}
nbds$count <- sapply(nbds$Name, get_shooting_count)
# draw legend
range.count <- range(nbds$count,na.rm=T)
labs <- c(0,50,100,150,200,250)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')), labs)
leaflet(nbds) %>%
addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
addPolygons(stroke=T,
weight=1,
popup=paste0(nbds$Name,"<br/>Shootings: ",nbds$count),
color=~pal.crime(count),
fillOpacity=.5) %>%
addLegend("bottomright",title="# of shootings (2012-present)",colors=~pal.crime(labs),labels=~labs)
This version adjusts by the population of each neighborhood. Be careful about the super bright red areas, because some of them have very few residents and so even a single shooting will make them look really dangerous even though they’re probably not. (You can tap neighborhoods to see the number of residents.)
#--------- population-adjusted --------------#
nbds$per1k <- nbds$count / nbds$Population * 1000
nbds$per1k <- round(nbds$per1k)
nbds$per1k <- ifelse(nbds$Population == 0, NA, nbds$per1k)
labs <- c(0,25,50,75)
pal.crime <- colorNumeric(colorRamp(c('#ccccff', 'red')),
labs,
na.color = "#b2b2b2")
countlabel <- paste0(nbds$Name,"<br/>",nbds$count," shootings among ",nbds$Population," residents")
nbds$countlabel <- ifelse(nbds$Population == 0, paste0(nbds$Name,":<br/>","No residents"), countlabel)
leaflet(nbds) %>% #draw population-adjusted map,
#areas with 0 residents are greyed
#out but can still be clicked
addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
addPolygons(stroke=T,
weight=1,
popup=nbds$countlabel,
color=~pal.crime(per1k),
fillOpacity=.6) %>%
addLegend("bottomright",title="Shootings per one</br>thousand residents</br>(2012-present)",colors=~pal.crime(labs),labels=~labs)
Neighborhoods in our dataset have been categorized by district. Here is a map of all the districts. We don’t plot any neighborhoods that had zero observed shootings, which explains the holes in the map.
bpd <- bpd[complete.cases(bpd$District, bpd$Neighborhood),]
# 23 neighborhoods have been coded in two different districts, presumably on accident
# we just pick the first district for now
district.index <- bpd %>% group_by(Neighborhood) %>% summarise(District = unique(District)[1])
get_district <- function(neighborhood){
nbd <- as.character(neighborhood)
dist <- district.index[which(district.index$Neighborhood == nbd),]$District
return(dist)
}
nbds$district <- sapply(nbds$Name, get_district)
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
labs <- unique(bpd$District)
pal.districts <- colorFactor(gg_color_hue(length(labs)),
labs,
na.color="#00000000")
leaflet(nbds) %>%
addProviderTiles(providers$CartoDB.PositronNoLabels) %>%
addPolygons(stroke=T,
weight=1,
color=~pal.districts(district),
popup=nbds$Name,
fillOpacity=.5) %>%
addLegend("bottomright",title="Districts",colors=~pal.districts(labs),labels=~labs)
library(lubridate)
bpd$month <- month(bpd$CrimeDate)
bpd$year <- year(bpd$CrimeDate)
count <- bpd %>%
group_by(year, month, District) %>%
summarise(total.count=n())
count <- count[order(count$District, count$year, count$month), ]
# remove incomplete month
count <- count[!(count$month == 6 & count$year == 2019),]
count <- data.frame(temp=1:nrow(count), count)
count$date <- as.Date(with(count,
paste(year,month,1, sep="-"),
format="%m/%d/%Y"))
subset(count) %>%
ggplot() +
aes(x=date, y=total.count,group=District, color=District) +
geom_point(alpha=.2) +
stat_smooth(alpha=.5, se=F) +
xlab("date") +
ylab("Number of monthly shootings") +
theme_classic() +
ggtitle("Monthly shooting counts in Baltimore")
It looks like the southeastern district is where shootings are rising the most, but that district is still not the worst in terms of monthly shootings.